In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Perceptron
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from graphhelpers import plot_decision_regions
In [2]:
iris = datasets.load_iris()
print iris.feature_names
print iris.target_names
X = iris.data[:, [2, 3]]
y = iris.target
print np.unique(y)
In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3, random_state=0)
In [4]:
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
In [5]:
ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)
Out[5]:
In [6]:
y_pred = ppn.predict(X_test_std)
print 'Misclassified samples: {0}'.format((y_pred != y_test).sum())
print 'Total samples: {0}'.format(len(y_pred))
print 'Misclassification error: {0:.3f}'.format(4./45.)
print 'Accuracy: {:.3f}'.format(1 - 4./45.)
In [7]:
# Using the sklearn.metrics.accuracy_score method
print 'Accuracy: {acc:.1f}%'.format(acc=accuracy_score(y_pred, y_test)*100)
In [8]:
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
In [9]:
plt = plot_decision_regions(X=X_combined_std, y=y_combined, classifier=ppn, test_index_range=range(len(X_train_std), len(X_combined_std)))
plt.xlabel('petal length (standardized)')
plt.ylabel('petal width (standardized)')
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()
In [10]:
# logit(p) = log( p / (1 - p) ) = z
# p / (1 - p) = e^z
# (1 - p) / p = e^-z
# (1 / p) - 1 = e^-z
# 1 / p = 1 + e^-z
# p = 1 / (1 + e^-z) = sigmoid(z)
def sigmoid(z):
return 1. / (1. + np.exp(-z))
In [11]:
z = np.arange(-7, 7, .1)
phi_z = sigmoid(z)
plt.plot(z, phi_z)
plt.axvline(0.0, color='k')
plt.axhspan(0.0, 1.0, facecolor='1.0', alpha=1.0, ls='dotted')
plt.axhline(y=0.5, ls='dotted', color='k')
plt.yticks([0, .5, 1])
plt.ylim(-.1, 1.1)
plt.xlabel('z')
plt.ylabel('$\phi (z)$')
plt.show()
In Adaline the "activation" function was just the identity function (matrix)
In Logistic regression the activation function = the sigmoid function
In [12]:
lr = LogisticRegression(C=1000., random_state=0)
In [13]:
lr.fit(X_train_std,y_train)
plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_index_range=range(len(X_train_std), len(X_combined_std)))
plt.xlabel('petal length (standardized)')
plt.ylabel('petal width (standardized)')
plt.legend(loc='upper left')
plt.show()
In [14]:
# reshape to (1, -1) since it's a single sample
lr.predict_proba(X_test_std[0, :].reshape(1, -1)).round(3)
Out[14]:
In [15]:
lr.coef_
Out[15]:
In [16]:
weights, params = [], []
for c in np.arange(-5, 5):
params.append(10.**c)
lr = LogisticRegression(C=10.**c, random_state=0)
lr.fit(X_train_std, y_train)
weights.append(lr.coef_[1]) # coefficients for 2nd class (versicolor)
weights = np.array(weights)
In [17]:
plt.plot(params, weights[:, 0], label='petal length')
plt.plot(params, weights[:, 1], label='petal width', linestyle='--')
plt.ylabel('weight coefficient')
plt.xlabel('C')
plt.legend(loc='upper left')
plt.xscale('log')
plt.show()
In [ ]: